package lib

// 基础包

import (
	"github.com/henrylee2cn/pholcus/app/downloader/request" //必需
	. "github.com/henrylee2cn/pholcus/app/spider"           //必需
	"github.com/henrylee2cn/pholcus/common/goquery"         //DOM解析
	"net/http"
	"strings"
)

//修改这个为其他岗位的，可以爬取其他岗位的数据
const positionURL = "https://www.lagou.com/zhaopin/go/?filterOption=3"

func init() {
	header.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3")
	header.Add("Accept-Encoding", "gzip, deflate, br")
	header.Add("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8,ko;q=0.7")
	header.Add("Cache-Control", "max-age=0")
	header.Add("Connection", "keep-alive")
	header.Add("Cookie", "user_trace_token=20190531140819-7cb3f380-836a-11e9-ac96-525400332722; JSESSIONID=ABAAABAABEEAAJAE928B8193812F4B55584AFA41283E0EF; _ga=GA1.2.1357956302.1562071324; _gid=GA1.2.952628591.1562071324; Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1562071324; LGSID=20190702204203-cb0c4a53-9cc6-11e9-a4d5-5254005c3644; PRE_UTM=; PRE_HOST=; PRE_SITE=https%3A%2F%2Fsec.lagou.com%2Fverify.html%3Fe%3D2%26f%3Dhttps%3A%2F%2Fwww.lagou.com%2Fzhaopin%2Fgo%2F%3FfilterOption%3D3; PRE_LAND=https%3A%2F%2Fwww.lagou.com%2Fzhaopin%2Fgo%2F%3FfilterOption%3D3; LGUID=20190702204203-cb0c4db1-9cc6-11e9-a4d5-5254005c3644; SEARCH_ID=3c5957d97d95492f9d38dfcf2d2bc395; X_HTTP_TOKEN=cd6f66d5e45149901032702651b7582a8cac6b473e; _gat=1; LGRID=20190702205821-11cc1d52-9cc9-11e9-bc11-525400f775ce; Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6=1562072301")
	header.Add("DNT", "1")
	header.Add("Host", "www.lagou.com")
	header.Add("Referer", "https://www.lagou.com/")
	header.Add("Upgrade-Insecure-Requests", "1")
	header.Add("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36")
	lagou.Register()
}

var lagou = &Spider{
	Name:            "拉勾-岗位",
	Description:     "拉勾上的全部岗位【https://www.lagou.com】",
	EnableCookie:    true,
	NotDefaultField: true,
	RuleTree:        lagouRuleTree,
}

var header = http.Header{}
var lagouRuleTree = &RuleTree{
	Root: func(ctx *Context) {
		ctx.AddQueue(&request.Request{
			Url:      positionURL,
			TryTimes: 1,
			Rule:     "requestList",
			Header:   header,
		})
	},

	Trunk: map[string]*Rule{
		"requestList": {
			ParseFunc: func(ctx *Context) {
				header.Set("Referer", ctx.Request.Url)
				nextSelection := ctx.GetDom().Find("div.pager_container").Find("a").Last()
				url, _ := nextSelection.Attr("href")
				if len(url) != 0 && strings.HasPrefix(url, "http") {
					ctx.AddQueue(&request.Request{
						Url:      url,
						TryTimes: 10,
						Rule:     "requestList",
						Priority: 1,
						Header:   header,
					})
				}
				ctx.Parse("outputResult")
			},
		},

		"outputResult": {
			ItemFields: []string{
				"岗位",
				"薪水",
				"工作地点",
				"公司",
			},
			ParseFunc: func(ctx *Context) {
				dom := ctx.GetDom()
				dom.Find("div.list_item_top").Each(func(i int, selection *goquery.Selection) {
					jobName := selection.Find("div.p_top").Find("h3").Text()
					city := selection.Find("div.p_top").Find("em").Text()
					city = strings.Split(city, "·")[0]
					salay := selection.Find("div.p_bot").Find("span.money").Text()
					company := selection.Find("div.company").Find("a").Text()
					ctx.Output(map[int]interface{}{
						0: jobName,
						1: salay,
						2: city,
						3: company,
					})
				})
			},
		},
	},
}
